In addition to analyzing race relations between soldiers, we want to see how gender roles were discussed amongst soldiers. Our particular focus is on women, but both male and female roles are explored.
# Visualizing Bigrams functions
# From tidytext documentation
visualize_bigrams <- function(bigrams, title) {
set.seed(2020)
a <- grid::arrow(type = "closed", length = unit(.15, "inches"))
bigrams %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = n), show.legend = FALSE, arrow = a) +
geom_node_point(color = "lightblue", size = 5) +
geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
theme_void() + theme(legend.position = "none",
plot.title = element_text(hjust = 0.5)) +
ggtitle(title)
}
cooccur_gender <- function(data, dictionary, n=5, corr=.15){
data %>%
mutate(section = row_number()) %>%
filter(section > 0) %>%
unnest_tokens(word, text) %>%
filter(!word %in% stop_words$word) %>%
mutate(word = stem_words(lemmatize_words(word))) %>%
group_by(word) %>%
filter(n() >= n) %>%
pairwise_cor(word, section, sort = TRUE) %>%
filter(grepl(dictionary, item1))%>%
filter(correlation >= corr)
}
visualize_cooccur <- function(data, title){
set.seed(2020)
data %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 5) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_void() + ggtitle(title)
}
conn <- dbConnect(drv = PostgreSQL(),
dbname = "sdad",
host = "10.250.124.195",
port = 5432,
user = Sys.getenv("db_userid"),
password = Sys.getenv("db_pwd"))
# query the bipartite edgelist data from github data
data <- dbGetQuery(conn, "SELECT * FROM american_soldier.survey_32_clean")
dbDisconnect(conn)
## [1] TRUE
##### some data cleaning
#remove possessive 's
data$outfits_comment <- str_replace_all(data$outfits_comment, "'s", "")
data$long <- str_replace_all(data$long, "'s", "")
data$outfits_comment <- str_replace_all(data$outfits_comment, "\\+", "and")
data$long <- str_replace_all(data$long, "\\+", "and")
#remove all symbols except periods.
data$outfits_comment <- str_replace_all(data$outfits_comment, "[^[:alnum:][:space:].]", "")
data$long <- str_replace_all(data$long, "[^[:alnum:][:space:].]", "")
#### create custom stop words
custom_stop <- append(stop_words$word, c("question", "questionnaire", "answer"))
The following analysis will explore which words are paired and associated with female and male words.
A dictionary of gender words are loaded to identy which responses relate to gender topics.
#load dictionary csv
gender_words <- fread("~/git/dspg2020amsoldier/data/dictionary/gender.csv", sep = ",");head(gender_words)
## gender category
## 1: androgynous identity
## 2: blackboi male
## 3: blackgirl female
## 4: blackman male
## 5: blackwoman female
## 6: boy male
gender_match <- paste(paste("\\b", gender_words$gender,"\\b", sep = ""), collapse="|") #regex friendly
#subset data based on question and race
S32W_short <- data %>% filter(racial_group== "white") %>% select(outfits_comment) %>% filter(!is.na(outfits_comment))
S32W_short <- tibble(nrow=1:nrow(S32W_short), text = S32W_short$outfits_comment)
S32W_long <- data %>% filter(racial_group== "white") %>% select(long) %>% filter(!is.na(long))
S32W_long <- tibble(nrow=1:nrow(S32W_long), text = S32W_long$long)
S32N_long <- data %>% filter(racial_group== "black") %>% select(long) %>% filter(!is.na(long))
S32N_long <- tibble(nrow=1:nrow(S32N_long), text = S32N_long$long)
To view commentary on gender relations, we will look at the bi-grams which will show which words appear next to gender related words.
#### unnest tokens into bigrams
#### counts bigrams, but filter for those that have words in the gender dictionary.
#### separates bigrams into word1 and word 2 column
#### removes rows that contain a stopword
#### lem then stem words
#### finally count the bigrams.
short_bigrams_gender <- S32W_short %>% unnest_tokens(bigram, text, token = "ngrams", n=2) %>%
count(bigram, sort =TRUE) %>% filter(grepl(gender_match, bigram)) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
mutate(word1 = textstem::lemmatize_words(word1), word2 = textstem::lemmatize_words(word2)) %>%
mutate(word1 = wordStem(word1), word2 = wordStem(word2)) %>%
count(word1, word2, sort = TRUE)
## Using `n` as weighting variable
## ℹ Quiet this message with `wt = n` or count rows with `wt = 1`
longW_bigrams_gender <- S32W_long %>% unnest_tokens(bigram, text, token = "ngrams", n=2) %>%
count(bigram, sort =TRUE) %>% filter(grepl(gender_match, bigram)) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
mutate(word1 = textstem::lemmatize_words(word1), word2 = textstem::lemmatize_words(word2)) %>%
mutate(word1 = wordStem(word1), word2 = wordStem(word2)) %>%
count(word1, word2, sort = TRUE)
## Using `n` as weighting variable
## ℹ Quiet this message with `wt = n` or count rows with `wt = 1`
longN_bigrams_gender <- S32N_long %>% unnest_tokens(bigram, text, token = "ngrams", n=2) %>%
count(bigram, sort =TRUE) %>% filter(grepl(gender_match, bigram)) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
mutate(word1 = textstem::lemmatize_words(word1), word2 = textstem::lemmatize_words(word2)) %>%
mutate(word1 = wordStem(word1), word2 = wordStem(word2)) %>%
count(word1, word2, sort = TRUE)
## Using `n` as weighting variable
## ℹ Quiet this message with `wt = n` or count rows with `wt = 1`
In the white soldiers' responses on the outfits separation question, we can see that the associations to gendered words are primarily related to racial identity. Since we are interested in investigating how conversations about gender differed between the two racial groups, we will join the two words into one. For example, color boi will be collapsed to colorboi, to see which words are used with this subject.
Overall, there's a wider variety of words that the black soldiers are using in their long responses.
With bi-grams, we will explore which words are paired with gender related words.
#load dictionary csv
gender_words <- fread("~/git/dspg2020amsoldier/data/dictionary/gender.csv", sep = ",")
gender_match <- paste(paste("\\b", gender_words$gender,"\\b", sep = ""), collapse="|") #regex friendly
####collapse certain two word phrases like white man --> whiteman
#read the csv file of correct spellings back in.
collapse <- fread("~/git/dspg2020amsoldier/data/dictionary/collapse_words.csv", sep = ",")
collapse <- mutate(collapse, original = paste("\\b", original,"\\b", sep = "")) #so that stringr doesn't pick up on instances where it is part of another word
#replace with collapsed words
data$long <- stri_replace_all_regex(data$long, collapse$original, collapse$collapse, vectorize_all = FALSE)
data$outfits_comment <- stri_replace_all_regex(data$outfits_comment, collapse$original, collapse$collapse, vectorize_all = FALSE)
#subset data based on question and race
S32W_short2 <- data %>% filter(racial_group== "white") %>% select(outfits_comment) %>% filter(!is.na(outfits_comment))
S32W_short2 <- tibble(nrow=1:nrow(S32W_short2), text = S32W_short2$outfits_comment)
S32W_long2 <- data %>% filter(racial_group== "white") %>% select(long) %>% filter(!is.na(long))
S32W_long2 <- tibble(nrow=1:nrow(S32W_long2), text = S32W_long2$long)
S32N_long2 <- data %>% filter(racial_group== "black") %>% select(long) %>% filter(!is.na(long))
S32N_long2 <- tibble(nrow=1:nrow(S32N_long2), text = S32N_long2$long)
#### unnest tokens into bigrams
#### counts bigrams, but filter for those that have words in the gender dictionary.
#### separates bigrams into word1 and word 2 column
#### removes rows that contain a stopword
#### lem then stem words
#### finally count the bigrams.
short_bigrams_gender2 <- S32W_short2 %>% unnest_tokens(bigram, text, token = "ngrams", n=2) %>%
count(bigram, sort =TRUE) %>% filter(grepl(gender_match, bigram)) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
mutate(word1 = textstem::lemmatize_words(word1), word2 = textstem::lemmatize_words(word2)) %>%
mutate(word1 = wordStem(word1), word2 = wordStem(word2)) %>%
count(word1, word2, sort = TRUE)
## Using `n` as weighting variable
## ℹ Quiet this message with `wt = n` or count rows with `wt = 1`
longW_bigrams_gender2 <- S32W_long2 %>% unnest_tokens(bigram, text, token = "ngrams", n=2) %>%
count(bigram, sort =TRUE) %>% filter(grepl(gender_match, bigram)) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
mutate(word1 = textstem::lemmatize_words(word1), word2 = textstem::lemmatize_words(word2)) %>%
mutate(word1 = wordStem(word1), word2 = wordStem(word2)) %>%
count(word1, word2, sort = TRUE)
## Using `n` as weighting variable
## ℹ Quiet this message with `wt = n` or count rows with `wt = 1`
longN_bigrams_gender2 <- S32N_long2 %>% unnest_tokens(bigram, text, token = "ngrams", n=2) %>%
count(bigram, sort =TRUE) %>% filter(grepl(gender_match, bigram)) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
mutate(word1 = textstem::lemmatize_words(word1), word2 = textstem::lemmatize_words(word2)) %>%
mutate(word1 = wordStem(word1), word2 = wordStem(word2)) %>%
count(word1, word2, sort = TRUE)
## Using `n` as weighting variable
## ℹ Quiet this message with `wt = n` or count rows with `wt = 1`
visualize_bigrams(short_bigrams_gender2, "White Soldiers' Outfits Response - Gender Words")
White soldiers short response:
visualize_bigrams(longW_bigrams_gender2, "White Soldiers' Long Response - Gender Words")
White soldiers long response:
rarely talk about black men or women. one presence of "colorman" that does exist is paired with despise.
We also see that white soldiers are talking about interracial marriage.
White commentar on the word woman are paired with wrods relating to women in the workforce such as worker, and waac and auxiliari which refer to the Women's Army Corps.
visualize_bigrams(longN_bigrams_gender2, "Black Soldiers' Long Response - Gender Words")
Black soldier long response:
A commonality between the black and white soldier commentary regarding females, is that their role as wife or mother is prevelant.
When talking about gender(both male and female) there is more commentary specifying racial groups as opposed to how white soldiers are talking about gender in their long responses. A notable bi-gram is the series of words following the node of whitewoman: whitewoman->holler->rape. In addition, when the white men are talked about, they are most frequently paired with the adjective southern. This is relating to commentary of how southern white men treated black men at the time.
colorwoman used with the words whip and vice
In all of the responses, the prevelance of familial words such as mother, father, brother, sister shows that soldiers frequently discussed family.
With co-occurrences we want to measure the proximity of words that occur with gender related words. The co-ooccurring words do not have to be directly before or after the gender word of interest.
The co-occurances are measured with pearson's coefficient to measure the correlation of occurances between words.
The threshold for the long responses are words that occur more than 5 times and correlations greater than .15. The threshold for the short responses is lower than that of long because it is a smaller body of text with correlations greater than .10
gender_match <- stem_words(lemmatize_words(gender_match))
S32W_short_cors <- S32W_short2 %>%
mutate(section = row_number()) %>%
filter(section > 0) %>%
unnest_tokens(word, text) %>%
filter(!word %in% stop_words$word) %>%
mutate(word = stem_words(lemmatize_words(word))) %>%
group_by(word) %>%
filter(n() >= 5) %>%
pairwise_cor(word, section, sort = TRUE) %>%
filter(grepl(gender_match, item1))%>%
filter(correlation > .10)
#plot
S32W_short_cors %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 5) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_void() + ggtitle("White Soldiers' Short Response - Gender Related Co-occurences")
Only a small handful of the outfits questions mention a word relating to the female gender. Due to the settings filtering for words that occur more than 5 times in the data, these occurrances don't show.
An interesting observation is that colored men are correlated with the word jealousy and whiteman correlated with the word clean.This provides further insight to the sentiment that white soldiers had towards themselves versus black soldiers. Furthermore, the strong correlation between whiteman and negro implies that the relationship between the two were talked about often.
no threshold for word count to show co-occurances that mention female relating words.
S32W_short_cors <- S32W_short2 %>%
mutate(section = row_number()) %>%
filter(section > 0) %>%
unnest_tokens(word, text) %>%
filter(!word %in% stop_words$word) %>%
mutate(word = stem_words(lemmatize_words(word))) %>%
group_by(word) %>%
#filter(n() >= 5) %>%
pairwise_cor(word, section, sort = TRUE) %>%
filter(grepl(gender_match, item1))%>%
filter(correlation > .10)
#plot
S32W_short_cors %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 5) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_void() + ggtitle("White Soldiers' Short Response - Gender Related Co-occurences")
It shows that wife has a great number of correlates, however, there is only one comment that mentions wife, and therefore, is not representative of the overall discussion of wives in relation to the question.
data$outfits_comment[5726]
## [1] "no country in the world is so blesses as the united states. we lead our individual lives to our liking. yes that is worth fighting for. the mark we attain is quided largely by how we do our job if you are a bum it is your own fault if you are an ambitious man you profit. i have been married nearly 2 yrs have a fine child naturally under these conditions i long to be with my wife. i do believe if i were a single man i would put my heart in my fighting and would want to see action. but even so i realize there is a hard job to do and want to do my bit rather than be a slacker."
Upon closer inspection, this comment seems to be an error in data storage and is more related to the long response question.
S32W_long_cors <- S32W_long2 %>%
mutate(section = row_number()) %>%
filter(section > 0) %>%
unnest_tokens(word, text) %>%
filter(!word %in% stop_words$word) %>%
mutate(word = stem_words(lemmatize_words(word))) %>%
group_by(word) %>%
filter(n() >= 5) %>%
pairwise_cor(word, section, sort = TRUE) %>%
filter(grepl(gender_match, item1))%>%
filter(correlation > .15)
#plot
S32W_long_cors %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 5) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_void() + ggtitle("White Soldiers' Long Response - Gender Related Co-occurences")
S32N_long_cors <- S32N_long2 %>%
mutate(section = row_number()) %>%
filter(section > 0) %>%
unnest_tokens(word, text) %>%
filter(!word %in% stop_words$word) %>%
mutate(word = stem_words(lemmatize_words(word))) %>%
group_by(word) %>%
filter(n() >= 5) %>%
pairwise_cor(word, section, sort = TRUE) %>%
filter(grepl(gender_match, item1))%>%
filter(correlation >= .15)
#plot
S32N_long_cors %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 5) +
geom_node_text(aes(label = name), repel = TRUE) +
theme_void() + ggtitle("Black Soldiers' Long Response - Gender Related Co-occurences")
### try networkD3
# colnames(S32N_long_cors) <- c("Source", "Target", "Weight")
# S32N_long_cors <- filter(S32N_long_cors, correlation > .20)
# simpleNetwork(S32N_long_cors)
When talking about whitemen specifically, the correlated words show that the soldiers will talk about relations to black men and fighting. When talking about women there is a distinction made between white and black women. Other than those cluster of words, women are often talked about as family members and home life.
#split the dictionary so we can analyze male and female separately
female_words <- gender_words %>% filter(category == "female")
female_match <- paste(paste("\\b", female_words$gender,"\\b", sep = ""), collapse="|") #regex friendly
male_words <- gender_words %>% filter(category == "male")
male_match <- paste(paste("\\b", male_words$gender,"\\b", sep = ""), collapse="|") #regex friendly
collapse <- fread("~/git/dspg2020amsoldier/data/dictionary/collapse_words.csv", sep = ",")
collapse <- mutate(collapse, original = paste("\\b", original,"\\b", sep = "")) #so that stringr doesn't pick up on instances where it is part of another word
#make sure replace with collapsed words
data$long <- stri_replace_all_regex(data$long, collapse$original, collapse$collapse, vectorize_all = FALSE)
data$outfits_comment <- stri_replace_all_regex(data$outfits_comment, collapse$original, collapse$collapse, vectorize_all = FALSE)
S32_long <- data %>% select(long) %>% filter(!is.na(long))
S32_long <- tibble(nrow=1:nrow(S32_long), text = S32_long$long)
long_bigrams_female <- S32_long %>% unnest_tokens(bigram, text, token = "ngrams", n=2) %>%
count(bigram, sort =TRUE) %>% filter(grepl(female_match, bigram)) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
mutate(word1 = textstem::lemmatize_words(word1), word2 = textstem::lemmatize_words(word2)) %>%
mutate(word1 = wordStem(word1), word2 = wordStem(word2)) %>%
count(word1, word2, sort = TRUE)
## Using `n` as weighting variable
## ℹ Quiet this message with `wt = n` or count rows with `wt = 1`
visualize_bigrams(long_bigrams_female, "Soldiers' Long Response - Female Words Bigrams")
The most frequent pairs of words being used when talking about the female gender is familial roles such as wife and relations to family like dad.
colorwoman -> whip
filter(S32_long,nrow == 690)$text
## [1] "in being in a camp white pvt. would not want negro non comisions officers to give them orders. we does not mind fighting if we get equal rights and privlage just like the whites. in the south a negro have to always go to the back of all hotels but whites goes in negro places anywhere they want to. i feel that is we are going to get equal rights we should get them now. it would make is feel more like we have some thing to fight for. but like it is now. it might turn out just like it did in the other war. after the war less privlages and right than before the war started. they are still whipping colorwoman on the buses in the south and lynching negroman on suspicion."
negrowoman -> graduat & negrowoman -> attain
filter(S32_long,nrow == 3356)$text
## [1] "in my opinion the army is much fairer than the navy in its treatment of the races. the army graduate negrowoman from its officers school in iowa. i have my first time to learn of a negrowoman attaining the rank of a lieutenant in the army. the army give qualified negro a chance to graduate from west point and become high ranking officers. the navy railroads all qualified negro out of annapolis. i have a word to say about our defense industries too. why thousands of american soldiers are dying in battle they are many narrowminded industries in the south who and are denying qualified negro from doing much more than they really are dying to do."
This response shows that despite the segregation of black and white Americans, there were some opportunities available to Black women at the time. The following link explains the acceptance of Black women into the army training school in Des Moines, Iowa: http://www.iowapbs.org/iowapathways/mypath/female-black-officers-train-des-moines-world-war-ii
Try above analysis again but with dictionary including relationship words
female_words <- gender_words %>% filter(category == "female" | category == "relation") %>% add_row(gender = "like", category = "relation")
female_match <- paste(paste("\\b", female_words$gender,"\\b", sep = ""), collapse="|") #regex friendly
keep_female <- append(gender_words$gender, c("coloredsoldi", "negrosoldi", "white", "negro"))
S32_long <- data %>% select(long) %>% filter(!is.na(long))
S32_long <- tibble(nrow=1:nrow(S32_long), text = S32_long$long)
long_bigrams_female <- S32_long %>% unnest_tokens(bigram, text, token = "ngrams", n=2) %>%
count(bigram, sort =TRUE) %>% filter(grepl(female_match, bigram)) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% filter(stop_words, word != "like" & word != "with" & word != "liked")$word) %>%
filter(!word2 %in% filter(stop_words, word != "like" & word != "with" & word != "liked")$word) %>%
mutate(word1 = textstem::lemmatize_words(word1), word2 = textstem::lemmatize_words(word2)) %>%
mutate(word1 = wordStem(word1), word2 = wordStem(word2)) %>%
count(word1, word2, sort = TRUE)
## Using `n` as weighting variable
## ℹ Quiet this message with `wt = n` or count rows with `wt = 1`
long_bigrams_female <- long_bigrams_female[!(long_bigrams_female$word1 == "like" & !long_bigrams_female$word2 %in% keep_female),]
long_bigrams_female <- long_bigrams_female[!(long_bigrams_female$word2 == "like" & !long_bigrams_female$word1 %in% keep_female),]
visualize_bigrams(long_bigrams_female, "Soldiers' Long Response - Female Words Bigrams")
Now, when including like as a relationship word, we can see at the like node there is a chain of "whiteman like colorwoman." There are also instances of whiteman are like black men/soldiers. This is either relating the two in similarity or in emotion of likeness between the two.
Another interesting addition to the network with relationship words is the chain of bigrams "sex intercourse with whitewoman". A hot topic was white soldiers being concerned with black soldiers having sexual relations with 'their' whitewomen. Likewise, black soldiers were aware of the tension of being falsely accused for raping or having sex with white women.
Otherwise, themes of family are prevelant for women's role in the commentary of the soldiers. We can also see the different pairings with the word woman, seeming to talk about their roles as workers amongst other attributes (worker, waac, auxiliary)
In similarity with the previous versions of the bigram networks, th
create coocurrance of same data
long_fem_co <- cooccur_gender(S32_long, female_match, n=5, corr=.12)
long_fem_co <- long_fem_co[!(long_fem_co$item1 == "like" & !long_fem_co$item2 %in% keep_female),]
long_fem_co <- long_fem_co[!(long_fem_co$item2 == "like" & !long_fem_co$item1 %in% keep_female),]
visualize_cooccur(long_fem_co, "Female related co-occurrences for long soldier response")
Themes of family are still prevelant. Negrowoman are associated with some negative words like filthy and invade. Whitewomen associations with the words intercourse, cry and molest are related to the controversy of black men having sexual realtions with whitemen. In addition, whitewomen is associated with negroman. This is shows that there is commentary of the association of black and white women which was controversial at the time.
female_words <- gender_words %>% filter(category == "female" | category == "relation") %>% add_row(gender = "like", category = "relation")
female_match <- paste(paste("\\b", female_words$gender,"\\b", sep = ""), collapse="|") #regex friendly
keep_female <- append(gender_words$gender, c("coloredsoldi", "negrosoldi", "white", "negro"))
S32N_long <- data %>% select(racial_group,long) %>% filter(!is.na(long), racial_group == "black")
S32N_long <- tibble(nrow=1:nrow(S32N_long), text = S32N_long$long)
long_bigrams_female <- S32N_long %>% unnest_tokens(bigram, text, token = "ngrams", n=2) %>%
count(bigram, sort =TRUE) %>% filter(grepl(female_match, bigram)) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% filter(stop_words, word != "like" & word != "with" & word != "liked")$word) %>%
filter(!word2 %in% filter(stop_words, word != "like" & word != "with" & word != "liked")$word) %>%
mutate(word1 = textstem::lemmatize_words(word1), word2 = textstem::lemmatize_words(word2)) %>%
mutate(word1 = wordStem(word1), word2 = wordStem(word2)) %>%
count(word1, word2, sort = TRUE)
## Using `n` as weighting variable
## ℹ Quiet this message with `wt = n` or count rows with `wt = 1`
long_bigrams_female <- long_bigrams_female[!(long_bigrams_female$word1 == "like" & !long_bigrams_female$word2 %in% keep_female),]
long_bigrams_female <- long_bigrams_female[!(long_bigrams_female$word2 == "like" & !long_bigrams_female$word1 %in% keep_female),]
visualize_bigrams(long_bigrams_female, "Black Soldiers' Long Response - Female Words Bigrams")
create coocurrance of same data
long_fem_co <- cooccur_gender(S32N_long, female_match, n=5, corr=.12)
long_fem_co <- long_fem_co[!(long_fem_co$item1 == "like" & !long_fem_co$item2 %in% keep_female),]
long_fem_co <- long_fem_co[!(long_fem_co$item2 == "like" & !long_fem_co$item1 %in% keep_female),]
visualize_cooccur(long_fem_co, "Female related co-occurrences for long Black soldiers' responses")
One of the strongest correlations are between negrowoman and negroman as well as negroman and whitewoman. This is indicative that interracial relations are one the most heavily correlated topics to women in the Black soldiers' responses. In addition, mother, sister and father have some of the strongest correlations, showing that women's most prevalent role for the soldiers was in their families.
female_words <- gender_words %>% filter(category == "female" | category == "relation") %>% add_row(gender = "like", category = "relation")
female_match <- paste(paste("\\b", female_words$gender,"\\b", sep = ""), collapse="|") #regex friendly
keep_female <- append(gender_words$gender, c("coloredsoldi", "negrosoldi", "white", "negro"))
S32W_long <- data %>% select(racial_group,long) %>% filter(!is.na(long), racial_group == "white")
S32W_long <- tibble(nrow=1:nrow(S32W_long), text = S32W_long$long)
long_bigrams_female <- S32W_long %>% unnest_tokens(bigram, text, token = "ngrams", n=2) %>%
count(bigram, sort =TRUE) %>% filter(grepl(female_match, bigram)) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% filter(stop_words, word != "like" & word != "with" & word != "liked")$word) %>%
filter(!word2 %in% filter(stop_words, word != "like" & word != "with" & word != "liked")$word) %>%
mutate(word1 = textstem::lemmatize_words(word1), word2 = textstem::lemmatize_words(word2)) %>%
mutate(word1 = wordStem(word1), word2 = wordStem(word2)) %>%
count(word1, word2, sort = TRUE)
## Using `n` as weighting variable
## ℹ Quiet this message with `wt = n` or count rows with `wt = 1`
long_bigrams_female <- long_bigrams_female[!(long_bigrams_female$word1 == "like" & !long_bigrams_female$word2 %in% keep_female),]
long_bigrams_female <- long_bigrams_female[!(long_bigrams_female$word2 == "like" & !long_bigrams_female$word1 %in% keep_female),]
visualize_bigrams(long_bigrams_female, "White Soldiers' Long Response - Female Words Bigrams")
We see a complete lack of narrative in black women specifically. Conversations about women are mostly about their role in the family as a mother, wife or sister. Another common conversation is about working women, with pairings to WAAC, auxiliary (referring to WAAC) and worker.
long_fem_co <- cooccur_gender(S32W_long, female_match, n=5, corr=.15)
long_fem_co <- long_fem_co[!(long_fem_co$item1 == "like" & !long_fem_co$item2 %in% keep_female),]
long_fem_co <- long_fem_co[!(long_fem_co$item2 == "like" & !long_fem_co$item1 %in% keep_female),]
visualize_cooccur(long_fem_co, "Female related co-occurrences for long White soldiers' responses")
female_words <- gender_words %>% add_row(gender = "like", category = "relation")#%>% filter(category == "female" | category == "relation")
female_match <- paste(paste("\\b", female_words$gender,"\\b", sep = ""), collapse="|") #regex friendly
keep_female <- append(gender_words$gender, c("coloredsoldi", "negrosoldi", "white", "negro"))
S32_short <- data %>% select(outfits_comment) %>% filter(!is.na(outfits_comment))
S32_short <- tibble(nrow=1:nrow(S32_short), text = S32_short$outfits_comment)
short_bigrams_female <- S32_short %>% unnest_tokens(bigram, text, token = "ngrams", n=2) %>%
count(bigram, sort =TRUE) %>% filter(grepl(female_match, bigram)) %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% filter(stop_words, word != "like" & word != "with" & word != "liked")$word) %>%
filter(!word2 %in% filter(stop_words, word != "like" & word != "with" & word != "liked")$word) %>%
mutate(word1 = textstem::lemmatize_words(word1), word2 = textstem::lemmatize_words(word2)) %>%
mutate(word1 = wordStem(word1), word2 = wordStem(word2)) %>%
count(word1, word2, sort = TRUE)
## Using `n` as weighting variable
## ℹ Quiet this message with `wt = n` or count rows with `wt = 1`
short_bigrams_female <- short_bigrams_female[!(short_bigrams_female$word1 == "like" & !short_bigrams_female$word2 %in% keep_female),]
short_bigrams_female <- short_bigrams_female[!(short_bigrams_female$word2 == "like" & !short_bigrams_female$word1 %in% keep_female),]
visualize_bigrams(short_bigrams_female, "Soldiers' Short Response - Female Words Bigrams")
short_fem_co <- cooccur_gender(S32_short, female_match, n=5, corr=.12)
short_fem_co <- short_fem_co[!(short_fem_co$item1 == "like" & !short_fem_co$item2 %in% keep_female),]
short_fem_co <- short_fem_co[!(short_fem_co$item2 == "like" & !short_fem_co$item1 %in% keep_female),]
visualize_cooccur(long_fem_co, "Female related co-occurrences for short soldier response")
Interesting commentary from a black soldier on race relations
data$long[346]
## [1] "one thing there is to much seggreation in the armored forces. for a country at war it is almost as if negro were fighting whites over here. i live with whites at home and get along with them very well in school in the streets and at dances. i even have a white girlfriend. i find there is no difference because they are a shade lighter than my race. we will all die together not one colored killed here and a white kill there. we must win this war. in order to preserve freedom of speech of religion and to get along with your neighbor."